# Import packages, data
import pandas as pd
import altair as alt
alt.data_transformers.enable('default',max_rows=None)
# Import and clean shrimp
shrimp_df = pd.read_csv("../../data/modified/shrimp/seasons.csv")
shrimp_df['Date'] = pd.to_datetime(shrimp_df['Date'])
# Import and clean elevation
elevation_df = pd.read_csv("../../data/raw/USGS_Water_Levels/usgs_water_levels.tsv", sep='\t', skiprows=29).tail(-1)
elevation_df = elevation_df.rename(
columns={
"datetime": "Date",
"178324_62614_00003": "water_elevation_ft",
"178324_62614_00003_cd": "data_quality"
}
)
elevation_df = elevation_df.dropna(subset=['water_elevation_ft'])
elevation_df = elevation_df.drop(columns=['agency_cd', 'site_no', 'data_quality'])
elevation_df['Date'] = pd.to_datetime(elevation_df['Date'])
elevation_df.head()
| Date | water_elevation_ft | |
|---|---|---|
| 1 | 1966-04-15 | 4194.90 |
| 17 | 1966-05-01 | 4194.90 |
| 31 | 1966-05-15 | 4194.90 |
| 48 | 1966-06-01 | 4194.90 |
| 62 | 1966-06-15 | 4194.60 |
shrimp_df.head()
| Date | Cysts | Nauplii | Juveniles | Males | Total Females | Females | Females with Eggs/Naups | Females with Cysts | Season | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-01-12 | 21.47 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 2022-23 |
| 1 | 2023-01-04 | 17.62 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 2022-23 |
| 2 | 2022-12-19 | 20.48 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 2022-23 |
| 3 | 2022-12-05 | 127.81 | 0.00 | 0.00 | 0.02 | 0.03 | 0.02 | 0.01 | 0.0 | 2022-23 |
| 4 | 2022-11-17 | 98.76 | 0.41 | 0.37 | 0.46 | 0.46 | 0.16 | 0.09 | 0.2 | 2022-23 |
# Join datasets
df = elevation_df.join(shrimp_df.set_index("Date"), on="Date")
# Fill missing data in Season
df.loc[df['Date'] < min(shrimp_df['Date']), 'Season'] = "Not recorded"
df.loc[df['Date'] > max(shrimp_df['Date']), 'Season'] = "Not recorded"
df['Season'] = df['Season'].fillna(method="ffill")
df.head()
| Date | water_elevation_ft | Cysts | Nauplii | Juveniles | Males | Total Females | Females | Females with Eggs/Naups | Females with Cysts | Season | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1966-04-15 | 4194.90 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Not recorded |
| 17 | 1966-05-01 | 4194.90 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Not recorded |
| 31 | 1966-05-15 | 4194.90 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Not recorded |
| 48 | 1966-06-01 | 4194.90 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Not recorded |
| 62 | 1966-06-15 | 4194.60 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Not recorded |
# Write to csv
df.to_csv("../../data/modified/shrimp/seasons_levels.csv")
df_filtered = df[df['Season'] != "Not recorded"]
df_filtered = pd.melt(
df_filtered,
id_vars=['Season', 'Date', 'water_elevation_ft'],
value_vars=[
'Nauplii', 'Juveniles', 'Males',
'Total Females', 'Females', 'Females with Eggs/Naups',
'Females with Cysts'
],
var_name="brine_type",
value_name="brine_count_per_liter"
).sort_values('Date')
df_filtered = df_filtered.reset_index(drop=True)
df_filtered.head()
| Season | Date | water_elevation_ft | brine_type | brine_count_per_liter | |
|---|---|---|---|---|---|
| 0 | 2010-11 | 2010-08-31 | 4193.52 | Nauplii | 1.30 |
| 1 | 2010-11 | 2010-08-31 | 4193.52 | Juveniles | 0.16 |
| 2 | 2010-11 | 2010-08-31 | 4193.52 | Females with Cysts | 0.16 |
| 3 | 2010-11 | 2010-08-31 | 4193.52 | Total Females | 0.32 |
| 4 | 2010-11 | 2010-08-31 | 4193.52 | Females | 0.09 |
# Elevation line plot
season_selection = alt.selection_single(fields=['Season'], name="Random")
color1 = alt.condition(season_selection,
alt.Color('Season:N', title="Harvest Season"),
alt.value('lightgray'))
elevation_line = (
alt.Chart(df_filtered)
.mark_line()
.encode(
x=alt.X('Date:T'),
y=alt.Y(
'water_elevation_ft:Q',
scale=alt.Scale(domain=[df_filtered['water_elevation_ft'].min(), df_filtered['water_elevation_ft'].max()])
),
color=color1
)
).add_selection(season_selection)
elevation_line.title = "Great Salt Lake Water Elevation"
elevation_line.encoding.x.title = 'Date'
elevation_line.encoding.y.title = 'Water Elevation (ft)'
elevation_line
/Users/joshgladwell/opt/anaconda3/envs/anly503/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. for col_name, dtype in df.dtypes.iteritems():
# Bar plot
# color = alt.condition(season_selection,
# alt.value('steelblue'),
# alt.value('lightgray'))
shrimp_bar = (
alt.Chart(df_filtered)
.mark_bar()
.encode(
y=alt.Y(
'mean(brine_count_per_liter):Q',
scale=alt.Scale(domain=[0, 4])
),
x=alt.X('brine_type:N'),
color=alt.value('#265F58')
)
).transform_filter(season_selection)
shrimp_bar.title ="Average Brine Shrimp Counts for Harvest"
shrimp_bar.encoding.x.title = 'Brine Shrimp'
shrimp_bar.encoding.y.title = 'Count (per liter of lakewater)'
(elevation_line | shrimp_bar).save("./shrimp_chart.html")
elevation_line | shrimp_bar